

<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0" />

  <title>Data Augment &mdash; Openspeech v0.3.0 documentation</title>



  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />










  <!--[if lt IE 9]>
    <script src="../_static/js/html5shiv.min.js"></script>
  <![endif]-->


      <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
        <script src="../_static/jquery.js"></script>
        <script src="../_static/underscore.js"></script>
        <script src="../_static/doctools.js"></script>
        <script src="../_static/language_data.js"></script>

    <script type="text/javascript" src="../_static/js/theme.js"></script>


    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="Feature Transform" href="Feature Transform.html" />
    <link rel="prev" title="Criterion" href="Criterion.html" />
</head>

<body class="wy-body-for-nav">


  <div class="wy-grid-for-nav">

    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >



            <a href="../index.html" class="icon icon-home"> Openspeech



          </a>







<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>


        </div>


        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">






              <p class="caption"><span class="caption-text">GETTING STARTED</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../notes/intro.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../notes/hydra_configs.html">Openspeech’s Hydra configuration</a></li>
<li class="toctree-l1"><a class="reference internal" href="../notes/configs.html">Openspeech’s configurations</a></li>
</ul>
<p class="caption"><span class="caption-text">OPENSPEECH MODELS</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech Model.html">Openspeech Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech CTC Model.html">Openspeech CTC Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech Encoder Decoder Model.html">Openspeech Encoder Decoder Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech Transducer Model.html">Openspeech Transducer Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech Language Model.html">Openspeech Language Model</a></li>
</ul>
<p class="caption"><span class="caption-text">MODEL ARCHITECTURES</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Conformer.html">Conformer</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/ContextNet.html">ContextNet</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/DeepSpeech2.html">DeepSpeech2</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Jasper.html">Jasper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Listen Attend Spell.html">Listen Attend Spell Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/LSTM LM.html">LSTM Language Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/QuartzNet.html">QuartzNet Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/RNN Transducer.html">RNN Transducer Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Transformer.html">Transformer Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Transformer LM.html">Transformer Language Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Transformer Transducer.html">Transformer Transducer Model</a></li>
</ul>
<p class="caption"><span class="caption-text">CORPUS</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../corpus/AISHELL-1.html">AISHELL</a></li>
<li class="toctree-l1"><a class="reference internal" href="../corpus/KsponSpeech.html">KsponSpeech</a></li>
<li class="toctree-l1"><a class="reference internal" href="../corpus/LibriSpeech.html">LibriSpeech</a></li>
</ul>
<p class="caption"><span class="caption-text">LIBRARY REFERENCE</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="Callback.html">Callback</a></li>
<li class="toctree-l1"><a class="reference internal" href="Criterion.html">Criterion</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Data Augment</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#module-openspeech.data.audio.augment">Augment</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="Feature Transform.html">Feature Transform</a></li>
<li class="toctree-l1"><a class="reference internal" href="Datasets.html">Datasets</a></li>
<li class="toctree-l1"><a class="reference internal" href="Data Loaders.html">Data Loaders</a></li>
<li class="toctree-l1"><a class="reference internal" href="Decoders.html">Decoders</a></li>
<li class="toctree-l1"><a class="reference internal" href="Encoders.html">Encoders</a></li>
<li class="toctree-l1"><a class="reference internal" href="Modules.html">Modules</a></li>
<li class="toctree-l1"><a class="reference internal" href="Optim.html">Optim</a></li>
<li class="toctree-l1"><a class="reference internal" href="Search.html">Search</a></li>
<li class="toctree-l1"><a class="reference internal" href="Tokenizers.html">Tokenizers</a></li>
<li class="toctree-l1"><a class="reference internal" href="Metric.html">Metric</a></li>
</ul>



        </div>

      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">


      <nav class="wy-nav-top" aria-label="top navigation">

          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../index.html">Openspeech</a>

      </nav>


      <div class="wy-nav-content">

        <div class="rst-content">



















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">

      <li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>

      <li>Data Augment</li>


      <li class="wy-breadcrumbs-aside">


            <a href="../_sources/modules/Data Augment.rst.txt" rel="nofollow"> View page source</a>


      </li>

  </ul>


  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">

  <div class="section" id="data-augment">
<h1>Data Augment<a class="headerlink" href="#data-augment" title="Permalink to this headline">¶</a></h1>
<div class="section" id="module-openspeech.data.audio.augment">
<span id="augment"></span><h2>Augment<a class="headerlink" href="#module-openspeech.data.audio.augment" title="Permalink to this headline">¶</a></h2>
<dl class="py class">
<dt id="openspeech.data.audio.augment.JoiningAugment">
<em class="property">class </em><code class="sig-prename descclassname">openspeech.data.audio.augment.</code><code class="sig-name descname">JoiningAugment</code><a class="reference internal" href="../_modules/openspeech/data/audio/augment.html#JoiningAugment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#openspeech.data.audio.augment.JoiningAugment" title="Permalink to this definition">¶</a></dt>
<dd><p>Data augment by concatenating audio signals</p>
<dl class="simple">
<dt>Inputs:</dt><dd><p>signal: np.ndarray [shape=(n,)] audio time series</p>
</dd>
<dt>Returns: signal</dt><dd><ul class="simple">
<li><p><strong>signal</strong>: concatenated signal</p></li>
</ul>
</dd>
</dl>
</dd></dl>

<dl class="py class">
<dt id="openspeech.data.audio.augment.NoiseInjector">
<em class="property">class </em><code class="sig-prename descclassname">openspeech.data.audio.augment.</code><code class="sig-name descname">NoiseInjector</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">noise_dataset_dir</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.9)">str</a></span></em>, <em class="sig-param"><span class="n">sample_rate</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)">int</a></span> <span class="o">=</span> <span class="default_value">16000</span></em>, <em class="sig-param"><span class="n">noise_level</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.9)">float</a></span> <span class="o">=</span> <span class="default_value">0.7</span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/openspeech/data/audio/augment.html#NoiseInjector"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#openspeech.data.audio.augment.NoiseInjector" title="Permalink to this definition">¶</a></dt>
<dd><p>Provides noise injection for noise augmentation.</p>
<dl class="simple">
<dt>The noise augmentation process is as follows:</dt><dd><p>1: Randomly sample audios by <cite>noise_size</cite> from dataset
2: Extract noise from <cite>audio_paths</cite>
3: Add noise to sound</p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>noise_dataset_dir</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.9)"><em>str</em></a>) – path of noise dataset</p></li>
<li><p><strong>sample_rate</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)"><em>int</em></a>) – sampling rate</p></li>
<li><p><strong>noise_level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.9)"><em>float</em></a>) – level of noise</p></li>
</ul>
</dd>
</dl>
<dl class="simple">
<dt>Inputs: signal</dt><dd><ul class="simple">
<li><p><strong>signal</strong>: signal from audio file</p></li>
</ul>
</dd>
<dt>Returns: signal</dt><dd><ul class="simple">
<li><p><strong>signal</strong>: noise added signal</p></li>
</ul>
</dd>
</dl>
</dd></dl>

<dl class="py class">
<dt id="openspeech.data.audio.augment.SpecAugment">
<em class="property">class </em><code class="sig-prename descclassname">openspeech.data.audio.augment.</code><code class="sig-name descname">SpecAugment</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">freq_mask_para</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)">int</a></span> <span class="o">=</span> <span class="default_value">18</span></em>, <em class="sig-param"><span class="n">time_mask_num</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)">int</a></span> <span class="o">=</span> <span class="default_value">10</span></em>, <em class="sig-param"><span class="n">freq_mask_num</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)">int</a></span> <span class="o">=</span> <span class="default_value">2</span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/openspeech/data/audio/augment.html#SpecAugment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#openspeech.data.audio.augment.SpecAugment" title="Permalink to this definition">¶</a></dt>
<dd><p>Provides Spec Augment. A simple data augmentation method for speech recognition.
This concept proposed in <a class="reference external" href="https://arxiv.org/abs/1904.08779">https://arxiv.org/abs/1904.08779</a></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>freq_mask_para</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)"><em>int</em></a>) – maximum frequency masking length</p></li>
<li><p><strong>time_mask_num</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)"><em>int</em></a>) – how many times to apply time masking</p></li>
<li><p><strong>freq_mask_num</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)"><em>int</em></a>) – how many times to apply frequency masking</p></li>
</ul>
</dd>
</dl>
<dl class="simple">
<dt>Inputs: feature_vector</dt><dd><ul class="simple">
<li><p><strong>feature_vector</strong> (torch.FloatTensor): feature vector from audio file.</p></li>
</ul>
</dd>
<dt>Returns: feature_vector:</dt><dd><ul class="simple">
<li><p><strong>feature_vector</strong>: masked feature vector.</p></li>
</ul>
</dd>
</dl>
</dd></dl>

<dl class="py class">
<dt id="openspeech.data.audio.augment.TimeStretchAugment">
<em class="property">class </em><code class="sig-prename descclassname">openspeech.data.audio.augment.</code><code class="sig-name descname">TimeStretchAugment</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">min_rate</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.9)">float</a></span> <span class="o">=</span> <span class="default_value">0.7</span></em>, <em class="sig-param"><span class="n">max_rate</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.9)">float</a></span> <span class="o">=</span> <span class="default_value">1.4</span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/openspeech/data/audio/augment.html#TimeStretchAugment"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#openspeech.data.audio.augment.TimeStretchAugment" title="Permalink to this definition">¶</a></dt>
<dd><p>Time-stretch an audio series by a fixed rate.</p>
<dl class="simple">
<dt>Inputs:</dt><dd><p>signal: np.ndarray [shape=(n,)] audio time series</p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>np.ndarray [shape=(round(n/rate),)] audio time series stretched by the specified rate</p>
</dd>
<dt class="field-even">Return type</dt>
<dd class="field-even"><p>y_stretch</p>
</dd>
</dl>
</dd></dl>

</div>
</div>


           </div>

          </div>
          <footer>
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
        <a href="Feature Transform.html" class="btn btn-neutral float-right" title="Feature Transform" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
        <a href="Criterion.html" class="btn btn-neutral float-left" title="Criterion" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
    </div>

  <hr/>

  <div role="contentinfo">
    <p>
        &#169; Copyright 2021, Kim, Soohwan and Ha, Sangchun and Cho, Soyoung.

    </p>
  </div>



    Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a

    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>

    provided by <a href="https://readthedocs.org">Read the Docs</a>.

</footer>
        </div>
      </div>

    </section>

  </div>


  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>






</body>
</html>